﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using HtmlDoc = HtmlAgilityPack.HtmlDocument;
using WebFunctions;
using Newtonsoft.Json;
using System.IO;
using System.Data;
using System.Net;
using System.Windows.Forms;

namespace AutoIndustryFiles
{
    class yaskawaScraper:FileLinksScraper2
    {


        public yaskawaScraper()
        {
            this.MainPage = "http://www.yaskawa.com.cn/download/Default.aspx";
            this.CompanyName = "安川电机中国有限公司";
            this.CompanyUrl = "http://www.yaskawa.com.cn/download/Default.aspx";
            this.ColumnName = "主页 -> 资料下载";
            //this.ReviseXPath += new OnReviseXPath(FuJiScraper_ReviseXPath);            
        }

        public void GetDownloadUrls(ExcelWrapper.ExcelApp ea,string path)
        {
            DataTable dt = new DataTable();
            dt.Columns.Add("name",Type.GetType("System.String"));
            dt.Columns.Add("id", Type.GetType("System.String"));
            dt.Columns.Add("url",Type.GetType("System.String"));

            foreach (string file in Directory.EnumerateFiles(@"C:\Users\Administrator\Desktop\yaskawa\Dump-0517-15-31-55\yaskawa.com.cn\download", "*.*", SearchOption.TopDirectoryOnly))
            {
                HtmlDoc doc = new HtmlDoc();
                doc.LoadHtml(File.ReadAllText(file, Encoding.GetEncoding("UTF-8")));

                HtmlAgilityPack.HtmlNodeCollection hnc_file = doc.DocumentNode.SelectNodes(@"//table[@class=""dateTab""]/tr[position()>1]");
                foreach (HtmlAgilityPack.HtmlNode node_file in hnc_file)
                {
                    try
                    {
                        dt.Rows.Add(new object[] { node_file.SelectSingleNode("td[1]").InnerText, node_file.SelectSingleNode("td[3]").InnerText, FormatUrl(node_file.SelectSingleNode("td[6]/a").GetAttributeValue("old", ""), @"http://yaskawa.com.cn/") });
                    }
                    catch (Exception)
                    { }
                }
            }

            ea.OpenFile(path);
            ea.SetRangeValues(dt,true,"A1");
            ea.SaveAs(path);
        }

        public override void GenerateAllLinks(string param)
        {
            string url0 = @"http://yaskawa.com.cn/download/";
            Uri uri = new Uri(url0);
            CookieContainer cookies = CookieManger.GetUriCookieContainer(uri);
            cookies = null;

            //big category
            FileLinks.Add(new FileLink("72339069014638592","","驱动控制事业","",false));
            FileLinks.Add(new FileLink("72620543991349248", "", "运动控制事业", "", false));
            FileLinks.Add(new FileLink("72902018968059904", "", "系统控制事业", "", false));
            FileLinks.Add(new FileLink("73464968921481216", "", "机器人事业", "", false));
            //second level
            FileLinks.Add(new FileLink("72340168526266368", "", "变频器", "驱动控制事业", false));
            FileLinks.Add(new FileLink("72341268037894144", "", "节能装置", "驱动控制事业", false));
            FileLinks.Add(new FileLink("72621643502977024", "", "伺服驱动", "运动控制事业", false));
            FileLinks.Add(new FileLink("72622743014604800", "", "控制器", "运动控制事业", false));
            FileLinks.Add(new FileLink("72903118479687680", "", "高压变频器", "系统控制事业", false));
            FileLinks.Add(new FileLink("72904217991315456", "", "各类系统备件", "系统控制事业", false));
            FileLinks.Add(new FileLink("73466068433108992", "", "多功能机器人", "机器人事业", false));
            FileLinks.Add(new FileLink("73467167944736768", "", "产业用机器人", "机器人事业", false));

            //get third level ids
            string url_subid = @"http://www.yaskawa.com.cn/ashx/download.aspx?action=selBig&BigID={0}";
            HtmlDoc doc = new HtmlDoc();
            bool f = false;
            for (int i = 4; i <= 11; i++)
            {
                UpdateMsg("yaskawaScraper",string.Format(url_subid, FileLinks[i].Id), ref f);
                string html = WebFuncs.OpenUrlEx(string.Format(url_subid, FileLinks[i].Id), 
                    "UTF-8", 
                    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.118 Safari/537.36",
                    "gzip, deflate, sdch",
                    "zh-CN,zh;q=0.8",
                    "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
                    "www.yaskawa.com.cn", "", cookies);
                doc.LoadHtml(html);

                string[,] zl = new string[6,2]{
                    {"144396663052566528","样本"},
                    {"144678138029277184","产品说明书"},
                    {"144959613005987840","选购件说明书"},
                    {"145241087982698496","工具"},
                    {"145522562959409152","认证"},
                    {"145804037936119808","其他"}
                };

                string url_docs_catalog = @"http://www.yaskawa.com.cn/download/default.aspx?Bid=0&sel={0}&Sid={1}&ZL={2}";
                foreach (HtmlAgilityPack.HtmlNode node in doc.DocumentNode.SelectNodes(@"/option[@value!=""""]"))
                {
                    string productName = node.NextSibling.InnerText;
                    string subid = node.GetAttributeValue("value", "");
                    //get docs
                    for (int j = zl.GetLowerBound(0); j <= zl.GetUpperBound(0); j++)
                    {
                        string url = string.Format(url_docs_catalog, FileLinks[i].Id, subid, zl[j, 0]);
                        UpdateMsg("yaskawaScraper", url, ref f);
                        HtmlDoc doc_file = new HtmlDoc();
                        doc_file.LoadHtml(WebFuncs.OpenUrlEx(url, 
                            "UTF-8",
                            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.118 Safari/537.36",
                            "gzip, deflate, sdch",
                            "zh-CN,zh;q=0.8",
                            "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
                            "www.yaskawa.com.cn", "", cookies));

                        if (doc_file == null)
                            continue;

                        HtmlAgilityPack.HtmlNodeCollection hnc_file = doc_file.DocumentNode.SelectNodes(@"//table[@class=""dateTab""]/tr[position()>1]");
                        if (hnc_file == null)
                            continue;

                        foreach (HtmlAgilityPack.HtmlNode node_file in hnc_file)
                        {
                            try
                            {
                                FileLinks.Add(new FileLink(
                                        node_file.SelectSingleNode("td[2]").InnerText, //productname as id
                                        FormatUrl(node_file.SelectSingleNode("td[6]/a").GetAttributeValue("old", ""), @"http://yaskawa.com.cn/"),//url
                                        node_file.SelectSingleNode("td[1]").InnerText, //name
                                        FileLinks[i].CategoryPath + "->" + FileLinks[i].Name + "->" + node.NextSibling.InnerText,
                                        true,
                                        zl[j, 1],
                                        node_file.SelectSingleNode("td[5]").InnerText,
                                        node_file.SelectSingleNode("td[3]").InnerText
                                    ));
                            }
                            catch (Exception)
                            { }
                        }
                    }
                }
            }
        }

    }
}
